home *** CD-ROM | disk | FTP | other *** search
- #!/usr/local/bin/gawk -f
- # address: find lines with matching name field in an address file
- # gawk is used because it has IGNORECASE.
- # @(#) address.gawk 2.6.1 97/07/26
- # johnd h. dubois iii (john@armory.com) 90/05/30
- # 90/11/14 removed ksh-specific code
- # 91/07/06 Changed to understand new record format (uses awk now),
- # changed syntax so that all normal args are components of
- # a single name and each name given must be matched as a word
- # 91/09/30 finished rewrite for new record format
- # 91/10/10 Added phone number dereferencing; made empty fields be
- # translated into single blanks so they'll be preserved when
- # split on one-or-more newlines (for sh & ksh, which have no
- # facility to split on single instances of a character); added -@
- # 91/11/11 changed to use [] to indicate an indirect reference,
- # expanded indirect reference check/lookup to include fields 2..4
- # (address, phone number, email address)
- # 91/11/13 Awk prog too big for XENIX cmd line arg;
- # separated into sh & awk files
- # 92/01/22 Added LIB to gawk line
- # 92/04/27 Converted to #!gawk script
- # 92/07/19 Converted to handle multiple numbers in phone number field
- # 92/10/16 minor bugfix
- # 93/05/02 Allow colons as separators in $ADDRESS
- # 93/09/02 Added -l option
- # 94/03/13 Cleaned up. Added comments. Print matching records at end.
- # Worked around gawk bugs.
- # 94/04/23 Read .addrrc
- # 96/01/21 Use $UHOME/.addrrc and $UHOME/.address
- # 96/05/06 Let % and @ be used as abbreviations in ADDRESS path.
- # Added p option.
- # 97/07/26 2.6.1 Added M option.
-
- # todo: allow searching on fields other than the name field.
-
- BEGIN {
- Name = "address"
- defPub = "/usr/local/public/address"
- Usage = "Usage: " Name " [-dDehlM@] [-a<filename[:filename...]>] name ..."
- rcFile = ".addrrc"
- ARGC = Opts(Name,Usage,"a:del@DhpxM",1,"~/" rcFile ":$UHOME/" rcFile,
- "ADDRESS,DEREFERENCE,PRINTREGEX,PRINTFILENAME,MATCHAT",1)
- Debug = Options["x"]
- if (ARGC == -1) {
- print Name ": " OptErr
- exit(1)
- }
- if ("h" in Options) {
- printf \
- "%s: find lines with matching name field in address files.\n"\
- "%s\n"\
- "%s searches address files for a given name, and prints any records that\n"\
- "have a matching name field. By default, any of the following that exist\n"\
- "are searched: the public address file %s, the file named\n"\
- ".address in the invoking user's home directory, and the file named\n"\
- ".address in the directory specified by the environment variable UHOME, if\n"\
- "it is set.\n"\
- "Name specification:\n"\
- "Matching is not case sensitive. The search name will match a name field\n"\
- "if each part of the search name occurs in the same order as given and as a\n"\
- "separate word in the name field. The search name will match a name field\n"\
- "even if other text occurs in the name field between the parts of the\n"\
- "search name. The parts of the search name should be separated by\n"\
- "whitespace on the command line, and should be quoted if they contain\n"\
- "characters meaningful to the shell. @ and : can be matched by either @ or\n"\
- ":. Blank fields are printed as lines consisting of a single space. If\n"\
- "multiple matching records are found, they are printed separated by lines\n"\
- "consisting solely of '+'.\n"\
- "Options:\n"\
- "Some of the following options can also be set by assigning values to\n"\
- "variables in a configuration file named %s, which is searched for in the\n"\
- "invoking user's home directory and in the directory specified by the\n"\
- "environment variable UHOME, if it is set (if both files exist, values set\n"\
- "in the former take precedence). Variables are assigned to with the\n"\
- "syntax: varname=value or in the case of flags, by simply putting the\n"\
- "indicated variable name in the file without a value. Variable names are\n"\
- "given in parentheses following the option descriptions.\n"\
- "-h: Print this help list.\n"\
- "-a<filename[:filename...]>: Search the named file(s) instead of the\n"\
- " default file. Multiple filenames may be given by separating them from\n"\
- " each other with whitespace or colons. If -a is not given, the value\n"\
- " of the environment variable ADDRESS is used if set; and if that is not\n"\
- " set, the value set in the configuration file is used. If none of\n"\
- " these are are set, the default files listed above are used. If any\n"\
- " files are explicitly named, the default files are not automatically\n"\
- " included; if they should be searched as well, they should be named\n"\
- " along with whatever other files are named. The public address file\n"\
- " may be abbreviated as '%%', and the default private files as '@'.\n"\
- "-d: Dereference indirect entries (those that are contained in brackets).\n"\
- " The string contained in brackets ([]) is looked up. The name field of\n"\
- " another record must match it exactly. The indirect entry, including\n"\
- " the enclosing brackets, is replaced by the entire equivalent field of\n"\
- " the referenced record. One level of dereferencing is done.\n"\
- " (DEREFERENCE)\n"\
- "-e: Print the regular expression used to search the name field to the\n"\
- " standard output (PRINTREGEX).\n"\
- "-D: Print a description of the format of an address file.\n"\
- "-l: Print the names of the files each entry was found in (PRINTFILENAME).\n"\
- "-p: Use the public address file (%s) only; equivalent to -a%%\n"\
- "-M: Multiple pattern search. By default, the arguments passed are searched\n"\
- " for as a single pattern. If -M is given, each argument is search for\n"\
- " separately. If a search pattern contains whitespace, it must be quoted\n"\
- " so that the shell will pass it as a single argument.\n"\
- "-@: Require that any @ or : in the name field be matched by the pattern.\n"\
- " (MATCHAT).\n",Name,Usage,Name,defPub,rcFile,defPub
- exit(0)
- }
- if ("D" in Options) {
- print \
- "Format of an address file: an address file consists of records separated\n"\
- "by delimiter lines consisting solely of a '+' character. The address file\n"\
- "must also begin and end with lines consisting solely of a '+' character.\n"\
- "The '+' character may not occur anywhere in the file except as a delimiter\n"\
- "line. Each line of the record is a field. The fields are used as follows:\n"\
- "+\n"\
- "Name\n"\
- "Address\n"\
- "Phone number\n"\
- "Email address\n"\
- "Comment ...\n"\
- "+\n"\
- "Comment can continue onto multiple lines. Unused fields can be left\n"\
- "blank; unused fields at the end of the record do not need to be given. \n"\
- Name " only interprets the name and (possibly) phone number fields.\n"\
- "An indirect reference can be given by specifying in a field\n"\
- "[indirect-name]\n"\
- "where indirect-name is an exact match for the name field of another record.\n"\
- "Other text may be included on the same line as indirect-name.\n"\
- "This can be used to add an extension to a PBX number, etc."
- exit(0)
- }
- if (ARGC < 2) {
- print Usage > "/dev/stderr"
- exit(1)
- }
- if ("p" in Options)
- Options["a"] = "%"
- GetFiles(Files,Options)
- NumMatches = SearchFiles(ARGV,"e" in Options,Files,MatchingRecords,
- FileFound,"M" in Options)
- PrintFilename = "l" in Options
- Dereference = "d" in Options
- for (i = 1; i <= NumMatches; i++)
- PrintRec(MatchingRecords[i],FileFound[i],PrintFilename,Dereference)
- }
-
- function FileExists(File, ret) {
- ret = (File == "") || ((getline < File) == 1)
- close(File)
- return ret
- }
-
- function GetPubFile(Files,nFiles,iFiles, file) {
- file = defPub
- if (!(file in iFiles) && FileExists(file))
- iFiles[Files[++nFiles] = file]
- return nFiles
- }
-
- function GetPrivFiles(Files,nFiles,iFiles, file) {
- if ("HOME" in ENVIRON && \
- !((file = ENVIRON["HOME"] "/.address") in iFiles) && FileExists(file))
- iFiles[Files[++nFiles] = file]
- if ("UHOME" in ENVIRON && \
- !((file = ENVIRON["UHOME"] "/.address") in iFiles) && FileExists(file))
- iFiles[Files[++nFiles] = file]
- return nFiles
- }
-
- function GetFiles(Files,Options, i,nFiles,aFiles,file,iFiles) {
- nFiles = 0
- if ("a" in Options) {
- split(Options["a"],aFiles,"[ \t:]+")
- for (i = 1; i in aFiles; i++) {
- if ((file = aFiles[i]) == "" || file in iFiles)
- continue
- iFiles[i]
- if (file == "%")
- nFiles = GetPubFile(Files,nFiles,iFiles)
- else if (file == "@")
- nFiles = GetPrivFiles(Files,nFiles,iFiles)
- else
- Files[++nFiles] = file
- }
- }
- else {
- # If no files specified on command line and $ADDRESS not set,
- # use the default files (but only those that exist)
- nFiles = GetPubFile(Files,nFiles,iFiles)
- nFiles = GetPrivFiles(Files,nFiles,iFiles)
- if (!nFiles) {
- print Name ": No address files." > "/dev/stderr"
- exit(1)
- }
- }
- if (Debug) {
- printf "%d address files:\n",nFiles > "/dev/stderr"
- for (i = 1; i <= nFiles; i++)
- printf "%s ",Files[i] > "/dev/stderr"
- print "" > "/dev/stderr"
- }
- }
-
- # Returns the number of matching records
- function SearchFiles(NamePieces,PrintExpression,Files,
- MatchingRecords,FileFound,Multiple,
- Name,i,count) {
- if (Debug) {
- print "Arguments are:" > "/dev/stderr"
- for (i = 1; i in NamePieces; i++)
- printf "%d:%s\n",i,NamePieces[i] > "/dev/stderr"
- }
- if (Multiple) {
- for (i = 1; i in NamePieces; i++)
- count += Search(\
- NamePieces[i],PrintExpression,Files,MatchingRecords,FileFound)
- return count
- }
- else {
- for (i = 1; i in NamePieces; i++)
- Name = Name " " NamePieces[i]
- Name = substr(Name,2)
- return Search(Name,PrintExpression,Files,MatchingRecords,FileFound)
- }
- }
-
- function Search(Name,PrintExpression,Files,MatchingRecords,FileFound,
- RName,Qualifier,FirstName) {
- if (Debug)
- printf "Searching for pattern \"%s\".\n",Name > "/dev/stderr"
- RName = MakePats(Name,RetVals)
- Qualifier = RetVals["Qualifier"]
- FirstName = RetVals["FirstName"]
- if (PrintExpression) {
- print "Name pattern: " RName > "/dev/stderr"
- if (Qualifier != "")
- print "Qualifier: " Qualifier > "/dev/stderr"
- }
- return FindAddr(Files,FirstName,RName,Qualifier,MatchingRecords,FileFound)
- }
-
- function Setup() {
- RS = "+"
- OFS = FS = "\n"
- IGNORECASE = 1
-
- AN = "[a-z0-9]" # alphanum
- NAN = "[^ \ta-z0-9" a "]" # nonalphanum
- NotAN = "[^a-z0-9" a "]" # not-alphanum
- NotNAN = "[ \ta-z0-9]" # not-nonalphanum
-
- # The type of a word is (Word ~ AN)
- an = 1
- nan = 0
-
- # Delimiters for use at ends of line
- Delim[an] = NotAN
- Delim[nan] = NotNAN
- # Delimiters between two words
- Delim[an,an] = NotAN "(.*" NotAN ")?"
- Delim[an,nan] = "(" NotAN ".*" NotNAN "| )?"
- Delim[nan,an] = "(" NotNAN ".*" NotAN "| )?"
- Delim[nan,nan] = NotNAN "(.*" NotNAN ")?"
- }
-
- # Convert Name to a search pattern.
- # Returns pattern qualifier in RetVals["Qualifier"],
- # first name in RetVals["FirstName"].
- # Return value: Search pattern.
- function MakePats(Name,RetVals) {
- Setup()
- if (Name ~ "[@:]") {
- # Split the last [@:]-separated part off into Qualifier
- Qualifier = Name
- sub(".*[@:]","",Qualifier)
- sub("[@:][^@:]*$","",Name)
- gsub("[@:]","[@:]",Name) # make either : or @ match either : or @
- RetVals["Qualifier"] = Name2SearchPat(Qualifier)
- }
- else
- delete RetVals["Qualifier"]
- return Name2SearchPat(Name,RetVals)
- }
-
- # Uses global: Delim[]
- # Returns first name in pattern in RetVals["FirstName"]
- # Return value: Search pattern
- function Name2SearchPat(Name,RetVals, Words,Types,NumWords,i,Pat) {
- # Tell awk that Words and Types are arrays...
- Words[1] = Types[1] = ""
- NumWords = GetWords(Name,Words,Types)
- if (!NumWords) # Quit if no words given, just whitespace
- exit 1
- for (i = 1; i < NumWords; i++)
- Pat = Pat Words[i] Delim[Types[i],Types[i+1]]
- Pat = Delim[Types[1]] ")" Pat Words[NumWords] "(" Delim[Types[NumWords]]
- Pat = "(^|" Pat "|$)"
- RetVals["FirstName"] = Words[1]
- return Pat
- }
-
- # A word is a sequence of either alphanums or non-alphanums,
- # as specified by the sets AN and NAN (neither of which include whitespace).
- # Words are delimited by whitespace or by the juxtaposition of AN and NAN.
- # GetWords puts the words of S in Words and the type of each word in Types.
- # The return value is the number of words found.
- function GetWords(S,Words,Types, i,Pos) {
- # convert all whitespace to single spaces
- gsub("[ \t]+"," ",S)
- # get rid of trailing whitespace
- gsub(" $","",S)
- i = 0
- while (S != "") {
- sub("^ ","",S)
- Pos = match(S,". |" NAN AN "|" AN NAN)
- if (Pos) {
- Words[++i] = substr(S,1,Pos)
- Types[i] = S ~ "^" AN
- S = substr(S,Pos + 1)
- }
- else {
- Words[++i] = S
- Types[i] = S ~ "^" AN
- S = ""
- }
- }
- return i
- }
-
- # Searches for an exact complete match of Name to the name field of
- # an entry in the address files
- # Returns the first matching record, or null if none found.
- # On success, if the file that the maching record was found is not an index
- # of IndFiles[], its name is printed and it is made an index of IndFiles[].
- function GetEntry(Name,PrintFilename,IndFiles,
- FileInd,File,Entry,ret,IndFile) {
- for (FileInd = 1; FileInd in Files; FileInd++) {
- File = Files[FileInd]
- # Make filename different
- # so that the file being read by FindAddr won't be affected
- # Don't have to do this with records not printed until end
- #if (File ~ "^/")
- # File = "/" File
- #else
- # File = "./" File
- if (Debug)
- printf "Checking file \"%s\" for name \"%s\"...\n",
- File,Name > "/dev/stderr"
- while ((ret = (getline Entry < File)) == 1)
- if (Entry ~ "^\n" Name "\n") {
- close(File)
- if (Debug)
- printf "Found entry: %s\n",Entry > "/dev/stderr"
- sub("^\n","",Entry)
- IndFile = Files[FileInd]
- if (PrintFilename && !(IndFile in IndFiles)) {
- print IndFile
- IndFiles[IndFile]
- }
- return Entry
- }
- if (ret == -1)
- printf "Could not open address file \"%s\".\n",Files[FileInd] > \
- "/dev/stderr"
- close(File)
- }
- return ""
- }
-
- # Sets MatchingRecords[n..m] to matching records, FileFound[n..m] to the file
- # each match is found in, and returns the number of matching records.
- # n is the starting index. If MatchingRecords["start"] is not set, n is 1;
- # if it is set, n is set to its value+1 and it is updated to the last index
- # used.
- function FindAddr(Files,FirstName,RName,Qualifier,MatchingRecords,FileFound,
- Ext,i,ret,InFile,NumMatch,MatchInd) {
- if ("start" in MatchingRecords)
- MatchInd = MatchingRecords["start"]
- for (i = 1; i in Files; i++) {
- InFile = Files[i]
- while ((ret = (getline < InFile)) == 1) {
- # $1 is always empty (it is the field between the + and the
- # first newline).
- # Compare against FirstName first to avoid expensive
- # comparision with RName for most records.
- if ( \
- ($2 ~ FirstName) && ($2 ~ RName) \
- &&
- # If +@ was given, either a qualifer must be given in
- # the search pattern...
- ( \
- !("@" in Options) || (Qualifier != "") ||
- # ... or the name field must not have a qualifier and the first
- # number on the phone number field must not have a qualifier
- (($2 !~ "[@:]") && ($4 ~ "^(\\[|[^: ]+( |$))" )) \
- ) &&
- # If a qualifier is given in the search pattern,
- ((Qualifier == "") ||
- # it must match a qualifier in the name field...
- match($2,".*[@:]") && (substr($2,1,RLENGTH - 1) ~ RName) &&
- (substr($2,RLENGTH + 1) ~ Qualifier) ||
- # ... or a qualifier in the phone number field
- (($4 = PhoneMatch($4,Qualifier)) != "")) ) {
- MatchingRecords[++MatchInd] = $0
- FileFound[MatchInd] = InFile
- NumMatch++
- if (Debug)
- printf "Found match:\n%s\n",$0 > "/dev/stderr"
- }
- }
- if (ret == -1) {
- printf "Error reading file \"%s\".\n",InFile > "/dev/stderr"
- exit(1)
- }
- close(InFile)
- }
- MatchingRecords["start"] = MatchInd
- return NumMatch
- }
-
- function PhoneMatch(Line,Qualifier, i,j,Fields) {
- split(Line,Fields," +")
- for (i = 1; i in Fields; i++)
- if (match(Fields[i],".*:") &&
- (substr(Fields[i],1,RLENGTH - 1) ~ Qualifier)) {
- Line = Fields[i]
- for (j = 1; j in Fields; j++)
- if (j != i)
- Line = Line " " Fields[j]
- return Line
- }
- return ""
- }
-
- # Uses/sets global _Plus, to arrange for each pair of records to be separated
- # by a + character alone on a line.
- function PrintRec(Record,InFile,PrintFilename,Dereference, FieldVal,
- NumFields,MainFields,FieldNum,IndirName,Fields,Indir,i,IndFiles,Entry) {
- # Convince gawk that Fields is an array...
- Fields["x"] = ""
- # Print a "+" between records, but not before the first record
- printf _Plus
- _Plus = "+\n"
- # Print source file if asked for
- if (PrintFilename) {
- print InFile
- IndFiles[InFile]
- }
- # Get rid of the leading & trailing newlines
- NumFields = split(substr(Record,2),MainFields,"\n")
- if (MainFields[NumFields] == "")
- NumFields--
- # Dereference fields 2-4 (fields which do not exist will fail to match)
- for (FieldNum = 2; FieldNum <= 4; FieldNum++) {
- FieldVal = MainFields[FieldNum]
- # A reference looks like: [replacement-record-name-field] other-stuff,
- # where replacement-record-name-field is an exact match for the name
- # field of another record.
- # Find the reference, if any.
- # gawk core dumps on this when the pattern is given in //, but not
- # when given in quotes...
- if (match(FieldVal,"\\[[^]]+\\]")) {
- IndirName = substr(MainFields[FieldNum],RSTART + 1,RLENGTH - 2)
- if ((Entry = \
- GetEntry(IndirName,PrintFilename,IndFiles)) == "") {
- printf \
- "Indirect reference not found for \"%s\"\n"\
- "in field %d of the following entry:\n%s\n",IndirName,FieldNum,substr(Record,2)
- exit 1
- }
- split(Entry,Fields,"\n")
- Indir = Fields[FieldNum]
- # Replace indirect reference with dereferenced value
- if (!Dereference)
- Indir = "[" IndirName "]->[" Indir "]"
- MainFields[FieldNum] = \
- substr(MainFields[FieldNum],1,RSTART -1) Indir \
- substr(MainFields[FieldNum],RSTART + RLENGTH)
- }
- }
- for (i = 1; i <= NumFields; i++)
- print MainFields[i]
- }
-
- ### Start of ProcArgs library
- # @(#) ProcArgs 1.12 97/05/26
- # 92/02/29 john h. dubois iii (john@armory.com)
- # 93/07/18 Added "#" arg type
- # 93/09/26 Do not count h option against MinArgs
- # 94/01/01 Stop scanning at first non-option arg. Added ">" option type.
- # Removed meaning of "+" or "-" by itself.
- # 94/03/08 Added & option and *()< option types.
- # 94/04/02 Added NoRCopt to Opts()
- # 94/06/11 Mark numeric variables as such.
- # 94/07/08 Opts(): Do not require any args if h option is given.
- # 95/01/22 Record options given more than once. Record option num in argv.
- # 95/06/08 Added ExclusiveOptions().
- # 96/01/20 Let rcfiles be a colon-separated list of filenames.
- # Expand $VARNAME at the start of its filenames.
- # Let varname=0 and [-+]option- turn off an option.
- # 96/05/05 Changed meaning of 7th arg to Opts; now can specify exactly how many
- # of the vars should be searched for in the environment.
- # Check for duplicate rcfiles.
- # 96/05/13 Return more specific error values. Note: ProcArgs() and InitOpts()
- # now return various negatives values on error, not just -1, and
- # Opts() may set Err to various positive values, not just 1.
- # Added AllowUnrecOpt.
- # 96/05/23 Check type given for & option
- # 96/06/15 Re-port to awk
- # 96/10/01 Moved file-reading code into ReadConfFile(), so that it can be
- # used by other functions.
- # 96/10/15 Added OptChars
- # 96/11/01 Added exOpts arg to Opts()
- # 96/11/16 Added ; type
- # 96/12/08 Added Opt2Set() & Opt2Sets()
- # 96/12/27 Added CmdLineOpt()
- # 97/02/22 Remove packed elements.
- # 97/02/28 Make sequence # for rcfiles & environ be "f" and "e".
- # Replaced CmdLineOpt() with OptsGiven().
- # 97/05/26 Added mangleHelp().
-
- # optlist is a string which contains all of the possible command line options.
- # A character followed by certain characters indicates that the option takes
- # an argument, with type as follows:
- # : String argument
- # ; Non-empty string argument
- # * Floating point argument
- # ( Non-negative floating point argument
- # ) Positive floating point argument
- # # Integer argument
- # < Non-negative integer argument
- # > Positive integer argument
- # The only difference the type of argument makes is in the runtime argument
- # error checking that is done.
-
- # The & option is a special case used to get numeric options without the
- # user having to give an option character. It is shorthand for [-+.0-9].
- # If & is included in optlist and an option string that begins with one of
- # these characters is seen, the value given to "&" will include the first
- # char of the option. & must be followed by a type character other than ":"
- # or ";".
- # Note that if e.g. &> is given, an option of -.5 will produce an error.
-
- # Strings in argv[] which begin with "-" or "+" are taken to be
- # strings of options, except that a string which consists solely of "-"
- # or "+" is taken to be a non-option string; like other non-option strings,
- # it stops the scanning of argv and is left in argv[].
- # An argument of "--" or "++" also stops the scanning of argv[] but is removed.
- # If an option takes an argument, the argument may either immediately
- # follow it or be given separately.
- # "-" and "+" options are treated the same. "+" is allowed because most awks
- # take any -options to be arguments to themselves. gawk 2.15 was enhanced to
- # stop scanning when it encounters an unrecognized option, though until 2.15.5
- # this feature had a flaw that caused problems in some cases. See the OptChars
- # parameter to explicitly set the option-specifier characters.
-
- # If an option that does not take an argument is given,
- # an index with its name is created in Options and its value is set to the
- # number of times it occurs in argv[].
-
- # If an option that does take an argument is given, an index with its name is
- # created in Options and its value is set to the value of the argument given
- # for it, and Options[option-name,"count"] is (initially) set to the 1.
- # If an option that takes an argument is given more than once,
- # Options[option-name,"count"] is incremented, and the value is assigned to
- # the index (option-name,instance) where instance is 2 for the second occurance
- # of the option, etc.
- # In other words, the first time an option with a value is encountered, the
- # value is assigned to an index consisting only of its name; for any further
- # occurances of the option, the value index has an extra (count) dimension.
-
- # The sequence number for each option found in argv[] is stored in
- # Options[option-name,"num",instance], where instance is 1 for the first
- # occurance of the option, etc. The sequence number starts at 1 and is
- # incremented for each option, both those that have a value and those that
- # do not. Options set from a config file get a sequence number of "f", and
- # options set in the environment get a sequence number of "e".
-
- # Options and their arguments are deleted from argv.
- # Note that this means that there may be gaps left in the indices of argv[].
- # If compress is nonzero, argv[] is packed by moving its elements so that
- # they have contiguous integer indices starting with 0.
- # Option processing will stop with the first unrecognized option, just as
- # though -- or ++ was given except that the unrecognized option will not be
- # removed from ARGV[]. Normally, an error value is returned in this case.
- # If AllowUnrecOpt is true, it is not an error for an unrecognized option to
- # be found, so the number of remaining arguments is returned instead.
- # If OptChars is not a null string, it is the set of characters that indicate
- # that an argument is an option string if the string begins with one of the
- # characters. A string consisting solely of two of the same option-indicator
- # characters stops the scanning of argv[]. The default is "-+".
- # argv[0] is not examined.
- # The number of arguments left in argc is returned.
- # If an error occurs, the global string OptErr is set to an error message
- # and a negative value is returned.
- # Current error values:
- # -1: option that required an argument did not get it.
- # -2: argument of incorrect type supplied for an option.
- # -3: unrecognized (invalid) option.
- function ProcArgs(argc,argv,OptList,Options,compress,AllowUnrecOpt,OptChars,
- ArgNum,ArgsLeft,Arg,ArgLen,ArgInd,Option,Pos,NumOpt,Value,HadValue,specGiven,
- NeedNextOpt,GotValue,OptionNum,Escape,dest,src,count,c,OptTerm,OptCharSet)
- {
- # ArgNum is the index of the argument being processed.
- # ArgsLeft is the number of arguments left in argv.
- # Arg is the argument being processed.
- # ArgLen is the length of the argument being processed.
- # ArgInd is the position of the character in Arg being processed.
- # Option is the character in Arg being processed.
- # Pos is the position in OptList of the option being processed.
- # NumOpt is true if a numeric option may be given.
- ArgsLeft = argc
- NumOpt = index(OptList,"&")
- OptionNum = 0
- if (OptChars == "")
- OptChars = "-+"
- while (OptChars != "") {
- c = substr(OptChars,1,1)
- OptChars = substr(OptChars,2)
- OptCharSet[c]
- OptTerm[c c]
- }
- for (ArgNum = 1; ArgNum < argc; ArgNum++) {
- Arg = argv[ArgNum]
- if (length(Arg) < 2 || !((specGiven = substr(Arg,1,1)) in OptCharSet))
- break # Not an option; quit
- if (Arg in OptTerm) {
- delete argv[ArgNum]
- ArgsLeft--
- break
- }
- ArgLen = length(Arg)
- for (ArgInd = 2; ArgInd <= ArgLen; ArgInd++) {
- Option = substr(Arg,ArgInd,1)
- if (NumOpt && Option ~ /[-+.0-9]/) {
- # If this option is a numeric option, make its flag be & and
- # its option string flag position be the position of & in
- # the option string.
- Option = "&"
- Pos = NumOpt
- # Prefix Arg with a char so that ArgInd will point to the
- # first char of the numeric option.
- Arg = "&" Arg
- ArgLen++
- }
- # Find position of flag in option string, to get its type (if any).
- # Disallow & as literal flag.
- else if (!(Pos = index(OptList,Option)) || Option == "&") {
- if (AllowUnrecOpt) {
- Escape = 1
- break
- }
- else {
- OptErr = "Invalid option: " specGiven Option
- return -3
- }
- }
-
- # Find what the value of the option will be if it takes one.
- # NeedNextOpt is true if the option specifier is the last char of
- # this arg, which means that if the option requires a value it is
- # the next arg.
- if (NeedNextOpt = (ArgInd >= ArgLen)) { # Value is the next arg
- if (GotValue = ArgNum + 1 < argc)
- Value = argv[ArgNum+1]
- }
- else { # Value is included with option
- Value = substr(Arg,ArgInd + 1)
- GotValue = 1
- }
-
- if (HadValue = AssignVal(Option,Value,Options,
- substr(OptList,Pos + 1,1),GotValue,"",++OptionNum,!NeedNextOpt,
- specGiven)) {
- if (HadValue < 0) # error occured
- return HadValue
- if (HadValue == 2)
- ArgInd++ # Account for the single-char value we used.
- else {
- if (NeedNextOpt) { # option took next arg as value
- delete argv[++ArgNum]
- ArgsLeft--
- }
- break # This option has been used up
- }
- }
- }
- if (Escape)
- break
- # Do not delete arg until after processing of it, so that if it is not
- # recognized it can be left in ARGV[].
- delete argv[ArgNum]
- ArgsLeft--
- }
- if (compress != 0) {
- dest = 1
- src = argc - ArgsLeft + 1
- if (src != dest) {
- for (count = ArgsLeft - 1; count; count--) {
- ARGV[dest] = ARGV[src]
- dest++
- src++
- }
- for (; dest < src; dest++)
- delete ARGV[dest]
- }
- }
- return ArgsLeft
- }
-
- # Assignment to values in Options[] occurs only in this function.
- # Option: Option specifier character.
- # Value: Value to be assigned to option, if it takes a value.
- # Options[]: Options array to return values in.
- # ArgType: Argument type specifier character.
- # GotValue: Whether any value is available to be assigned to this option.
- # Name: Name of option being processed.
- # OptionNum: Number of this option (starting with 1) if set in argv[],
- # or 0 if it was given in a config file or in the environment.
- # SingleOpt: true if the value (if any) that is available for this option was
- # given as part of the same command line arg as the option. Used only for
- # options from the command line.
- # specGiven is the option specifier character use, if any (e.g. - or +),
- # for use in error messages.
- # Global variables: OptErr
- # Return value: negative value on error, 0 if option did not require an
- # argument, 1 if it did & used the whole arg, 2 if it required just one char of
- # the arg.
- # Current error values:
- # -1: Option that required an argument did not get it.
- # -2: Value of incorrect type supplied for option.
- # -3: Bad type given for option &
- function AssignVal(Option,Value,Options,ArgType,GotValue,Name,OptionNum,
- SingleOpt,specGiven, UsedValue,Err,NumTypes) {
- # If option takes a value... [
- NumTypes = "*()#<>]"
- if (Option == "&" && ArgType !~ "[" NumTypes) { # ]
- OptErr = "Bad type given for & option"
- return -3
- }
-
- if (UsedValue = (ArgType ~ "[:;" NumTypes)) { # ]
- if (!GotValue) {
- if (Name != "")
- OptErr = "Variable requires a value -- " Name
- else
- OptErr = "option requires an argument -- " Option
- return -1
- }
- if ((Err = CheckType(ArgType,Value,Option,Name,specGiven)) != "") {
- OptErr = Err
- return -2
- }
- # Mark this as a numeric variable; will be propogated to Options[] val.
- if (ArgType != ":" && ArgType != ";")
- Value += 0
- if ((Instance = ++Options[Option,"count"]) > 1)
- Options[Option,Instance] = Value
- else
- Options[Option] = Value
- }
- # If this is an environ or rcfile assignment & it was given a value...
- else if (!OptionNum && Value != "") {
- UsedValue = 1
- # If the value is "0" or "-" and this is the first instance of it,
- # do not set Options[Option]; this allows an assignment in an rcfile to
- # turn off an option (for the simple "Option in Options" test) in such
- # a way that it cannot be turned on in a later file.
- if (!(Option in Options) && (Value == "0" || Value == "-"))
- Instance = 1
- else
- Instance = ++Options[Option]
- # Save the value even though this is a flag
- Options[Option,Instance] = Value
- }
- # If this is a command line flag and has a - following it in the same arg,
- # it is being turned off.
- else if (OptionNum && SingleOpt && substr(Value,1,1) == "-") {
- UsedValue = 2
- if (Option in Options)
- Instance = ++Options[Option]
- else
- Instance = 1
- Options[Option,Instance]
- }
- # If this is a flag assignment without a value, increment the count for the
- # flag unless it was turned off. The indicator for a flag being turned off
- # is that the flag index has not been set in Options[] but it has an
- # instance count.
- else if (Option in Options || !((Option,1) in Options))
- # Increment number of times this flag seen; will inc null value to 1
- Instance = ++Options[Option]
- Options[Option,"num",Instance] = OptionNum
- return UsedValue
- }
-
- # Option is the option letter
- # Value is the value being assigned
- # Name is the var name of the option, if any
- # ArgType is one of:
- # : String argument
- # ; Non-null string argument
- # * Floating point argument
- # ( Non-negative floating point argument
- # ) Positive floating point argument
- # # Integer argument
- # < Non-negative integer argument
- # > Positive integer argument
- # specGiven is the option specifier character use, if any (e.g. - or +),
- # for use in error messages.
- # Returns null on success, err string on error
- function CheckType(ArgType,Value,Option,Name,specGiven, Err,ErrStr) {
- if (ArgType == ":")
- return ""
- if (ArgType == ";") {
- if (Value == "")
- Err = "must be a non-empty string"
- }
- # A number begins with optional + or -, and is followed by a string of
- # digits or a decimal with digits before it, after it, or both
- else if (Value !~ /^[-+]?([0-9]+|[0-9]*\.[0-9]+|[0-9]+\.)$/)
- Err = "must be a number"
- else if (ArgType ~ "[#<>]" && Value ~ /\./)
- Err = "may not include a fraction"
- else if (ArgType ~ "[()<>]" && Value < 0)
- Err = "may not be negative"
- # (
- else if (ArgType ~ "[)>]" && Value == 0)
- Err = "must be a positive number"
- if (Err != "") {
- ErrStr = "Bad value \"" Value "\". Value assigned to "
- if (Name != "")
- return ErrStr "variable " substr(Name,1,1) " " Err
- else {
- if (Option == "&")
- Option = Value
- return ErrStr "option " specGiven substr(Option,1,1) " " Err
- }
- }
- else
- return ""
- }
-
- # Note: only the above functions are needed by ProcArgs.
- # The rest of these functions call ProcArgs() and also do other
- # option-processing stuff.
-
- # Opts: Process command line arguments.
- # Opts processes command line arguments using ProcArgs()
- # and checks for errors. If an error occurs, a message is printed
- # and the program is exited.
- #
- # Input variables:
- # Name is the name of the program, for error messages.
- # Usage is a usage message, for error messages.
- # OptList the option description string, as used by ProcArgs().
- # MinArgs is the minimum number of non-option arguments that this
- # program should have, non including ARGV[0] and +h.
- # If the program does not require any non-option arguments,
- # MinArgs should be omitted or given as 0.
- # rcFiles, if given, is a colon-seprated list of filenames to read for
- # variable initialization. If a filename begins with ~/, the ~ is replaced
- # by the value of the environment variable HOME. If a filename begins with
- # $, the part from the character after the $ up until (but not including)
- # the first character not in [a-zA-Z0-9_] will be searched for in the
- # environment; if found its value will be substituted, if not the filename will
- # be discarded.
- # rcfiles are read in the order given.
- # Values given in them will not override values given on the command line,
- # and values given in later files will not override those set in earlier
- # files, because AssignVal() will store each with a different instance index.
- # The first instance of each variable, either on the command line or in an
- # rcfile, will be stored with no instance index, and this is the value
- # normally used by programs that call this function.
- # VarNames is a comma-separated list of variable names to map to options,
- # in the same order as the options are given in OptList.
- # If EnvSearch is given and nonzero, the first EnvSearch variables will also be
- # searched for in the environment. If set to -1, all values will be searched
- # for in the environment. Values given in the environment will override
- # those given in the rcfiles but not those given on the command line.
- # NoRCopt, if given, is an additional letter option that if given on the
- # command line prevents the rcfiles and environment from being read.
- # See ProcArgs() for a description of AllowUnRecOpt and optChars, and
- # ExclusiveOptions() for a description of exOpts.
- # Special options:
- # If x is made an option and is given, some debugging info is output.
- # h is assumed to be the help option.
-
- # Global variables:
- # The command line arguments are taken from ARGV[].
- # The arguments that are option specifiers and values are removed from
- # ARGV[], leaving only ARGV[0] and the non-option arguments.
- # The number of elements in ARGV[] should be in ARGC.
- # After processing, ARGC is set to the number of elements left in ARGV[].
- # The option values are put in Options[].
- # On error, Err is set to a positive integer value so it can be checked for in
- # an END block.
- # Return value: The number of elements left in ARGV is returned.
- # Must keep OptErr global since it may be set by InitOpts().
- function Opts(Name,Usage,OptList,MinArgs,rcFiles,VarNames,EnvSearch,NoRCopt,
- AllowUnrecOpt,optChars,exOpts, ArgsLeft,e) {
- if (MinArgs == "")
- MinArgs = 0
- ArgsLeft = ProcArgs(ARGC,ARGV,OptList NoRCopt,Options,1,AllowUnrecOpt,
- optChars)
- if (ArgsLeft < (MinArgs+1) && !("h" in Options)) {
- if (ArgsLeft >= 0) {
- OptErr = "Not enough arguments"
- Err = 4
- }
- else
- Err = -ArgsLeft
- print mangleHelp(sprintf("%s: %s.\nUse -h for help.\n%s",
- Name,OptErr,Usage)," \t\n[") > "/dev/stderr"
- exit 1
- }
- if (rcFiles != "" && (NoRCopt == "" || !(NoRCopt in Options)) &&
- (e = InitOpts(rcFiles,Options,OptList,VarNames,EnvSearch)) < 0)
- {
- print Name ": " OptErr ".\nUse -h for help." > "/dev/stderr"
- Err = -e
- exit 1
- }
- if ((exOpts != "") && ((OptErr = ExclusiveOptions(exOpts,Options)) != ""))
- {
- printf "%s: Error: %s\n",Name,OptErr > "/dev/stderr"
- Err = 1
- exit 1
- }
- return ArgsLeft
- }
-
- # If this is not gawk, convert -x options in a help message to +x
- # If whitespace is non-null, it is the set of characters that may precede an
- # option indicator to indicate that is such. The default is newline, space,
- # or tab.
- function mangleHelp(message,whitespace, i,w) {
- if (IGNORECASE "" != "0") {
- if (whitespace == "")
- whitespace = " \t\n"
- for (i = 1; (w = substr(whitespace,i,1)) != ""; i++)
- gsub("\\" w "-",w "+",message)
- }
- return message
- }
-
- # ReadConfFile(): Read a file containing var/value assignments, in the form
- # <variable-name><assignment-char><value>.
- # Whitespace (spaces and tabs) around a variable (leading whitespace on the
- # line and whitespace between the variable name and the assignment character)
- # is stripped. Lines that do not contain an assignment operator or which
- # contain a null variable name are ignored, other than possibly being noted in
- # the return value. If more than one assignment is made to a variable, the
- # first assignment is used.
- # Input variables:
- # File is the file to read.
- # Comment is the line-comment character. If it is found as the first non-
- # whitespace character on a line, the line is ignored.
- # Assign is the assignment string. The first instance of Assign on a line
- # separates the variable name from its value.
- # If StripWhite is true, whitespace around the value (whitespace between the
- # assignment char and trailing whitespace on the line) is stripped.
- # VarPat is a pattern that variable names must match.
- # Example: "^[a-zA-Z][a-zA-Z0-9]+$"
- # If FlagsOK is true, variables are allowed to be "set" by being put alone on
- # a line; no assignment operator is needed. These variables are set in
- # the output array with a null value. Lines containing nothing but
- # whitespace are still ignored.
- # Output variables:
- # Values[] contains the assignments, with the indexes being the variable names
- # and the values being the assigned values.
- # Lines[] contains the line number that each variable occured on. A flag set
- # is record by giving it an index in Lines[] but not in Values[].
- # Return value:
- # If any errors occur, a string consisting of descriptions of the errors
- # separated by newlines is returned. In no case will the string start with a
- # numeric value. If no errors occur, the number of lines read is returned.
- function ReadConfigFile(Values,Lines,File,Comment,Assign,StripWhite,VarPat,
- FlagsOK,
- Line,Status,Errs,AssignLen,LineNum,Var,Val) {
- if (Comment != "")
- Comment = "^" Comment
- AssignLen = length(Assign)
- if (VarPat == "")
- VarPat = "." # null varname not allowed
- while ((Status = (getline Line < File)) == 1) {
- LineNum++
- sub("^[ \t]+","",Line)
- if (Line == "") # blank line
- continue
- if (Comment != "" && Line ~ Comment)
- continue
- if (Pos = index(Line,Assign)) {
- Var = substr(Line,1,Pos-1)
- Val = substr(Line,Pos+AssignLen)
- if (StripWhite) {
- sub("^[ \t]+","",Val)
- sub("[ \t]+$","",Val)
- }
- }
- else {
- Var = Line # If no value, var is entire line
- Val = ""
- }
- if (!FlagsOK && Val == "") {
- Errs = Errs \
- sprintf("\nBad assignment on line %d of file %s: %s",
- LineNum,File,Line)
- continue
- }
- sub("[ \t]+$","",Var)
- if (Var !~ VarPat) {
- Errs = Errs sprintf("\nBad variable name on line %d of file %s: %s",
- LineNum,File,Var)
- continue
- }
- if (!(Var in Lines)) {
- Lines[Var] = LineNum
- if (Pos)
- Values[Var] = Val
- }
- }
- if (Status)
- Errs = Errs "\nCould not read file " File
- close(File)
- return Errs == "" ? LineNum : substr(Errs,2) # Skip first newline
- }
-
- # Variables:
- # Data is stored in Options[].
- # rcFiles, OptList, VarNames, and EnvSearch are as as described for Opts().
- # Global vars:
- # Sets OptErr. Uses ENVIRON[].
- # If anything is read from any of the rcfiles, sets READ_RCFILE to 1.
- function InitOpts(rcFiles,Options,OptList,VarNames,EnvSearch,
- Line,Var,Pos,Vars,Map,CharOpt,NumVars,TypesInd,Types,Type,Ret,i,rcFile,
- fNames,numrcFiles,filesRead,Err,Values,retStr) {
- split("",filesRead,"") # make awk know this is an array
- NumVars = split(VarNames,Vars,",")
- TypesInd = Ret = 0
- if (EnvSearch == -1)
- EnvSearch = NumVars
- for (i = 1; i <= NumVars; i++) {
- Var = Vars[i]
- CharOpt = substr(OptList,++TypesInd,1)
- if (CharOpt ~ "^[:;*()#<>&]$")
- CharOpt = substr(OptList,++TypesInd,1)
- Map[Var] = CharOpt
- Types[Var] = Type = substr(OptList,TypesInd+1,1)
- # Do not overwrite entries from environment
- if (i <= EnvSearch && Var in ENVIRON &&
- (Err = AssignVal(CharOpt,ENVIRON[Var],Options,Type,1,Var,"e")) < 0)
- return Err
- }
-
- numrcFiles = split(rcFiles,fNames,":")
- for (i = 1; i <= numrcFiles; i++) {
- rcFile = fNames[i]
- if (rcFile ~ "^~/")
- rcFile = ENVIRON["HOME"] substr(rcFile,2)
- else if (rcFile ~ /^\$/) {
- rcFile = substr(rcFile,2)
- match(rcFile,"^[a-zA-Z0-9_]*")
- envvar = substr(rcFile,1,RLENGTH)
- if (envvar in ENVIRON)
- rcFile = ENVIRON[envvar] substr(rcFile,RLENGTH+1)
- else
- continue
- }
- if (rcFile in filesRead)
- continue
- # rcfiles are liable to be given more than once, e.g. UHOME and HOME
- # may be the same
- filesRead[rcFile]
- if ("x" in Options)
- printf "Reading configuration file %s\n",rcFile > "/dev/stderr"
- retStr = ReadConfigFile(Values,Lines,rcFile,"#","=",0,"",1)
- if (retStr > 0)
- READ_RCFILE = 1
- else if (ret != "") {
- OptErr = retStr
- Ret = -1
- }
- for (Var in Lines)
- if (Var in Map) {
- if ((Err = AssignVal(Map[Var],Var in Values ? Values[Var] : "",
- Options,Types[Var],Var in Values,Var,"f")) < 0)
- return Err
- }
- else {
- OptErr = sprintf(\
- "Unknown var \"%s\" assigned to on line %d\nof file %s",Var,
- Lines[Var],rcFile)
- Ret = -1
- }
- }
-
- if ("x" in Options)
- for (Var in Map)
- if (Map[Var] in Options)
- printf "(%s) %s=%s\n",Map[Var],Var,Options[Map[Var]] > \
- "/dev/stderr"
- else
- printf "(%s) %s not set\n",Map[Var],Var > "/dev/stderr"
- return Ret
- }
-
- # OptSets is a semicolon-separated list of sets of option sets.
- # Within a list of option sets, the option sets are separated by commas. For
- # each set of sets, if any option in one of the sets is in Options[] AND any
- # option in one of the other sets is in Options[], an error string is returned.
- # If no conflicts are found, nothing is returned.
- # Example: if OptSets = "ab,def,g;i,j", an error will be returned due to
- # the exclusions presented by the first set of sets (ab,def,g) if:
- # (a or b is in Options[]) AND (d, e, or f is in Options[]) OR
- # (a or b is in Options[]) AND (g is in Options) OR
- # (d, e, or f is in Options[]) AND (g is in Options)
- # An error will be returned due to the exclusions presented by the second set
- # of sets (i,j) if: (i is in Options[]) AND (j is in Options[]).
- # todo: make options given on command line unset options given in config file
- # todo: that they conflict with.
- function ExclusiveOptions(OptSets,Options,
- Sets,SetSet,NumSets,Pos1,Pos2,Len,s1,s2,c1,c2,ErrStr,L1,L2,SetSets,NumSetSets,
- SetNum,OSetNum) {
- NumSetSets = split(OptSets,SetSets,";")
- # For each set of sets...
- for (SetSet = 1; SetSet <= NumSetSets; SetSet++) {
- # NumSets is the number of sets in this set of sets.
- NumSets = split(SetSets[SetSet],Sets,",")
- # For each set in a set of sets except the last...
- for (SetNum = 1; SetNum < NumSets; SetNum++) {
- s1 = Sets[SetNum]
- L1 = length(s1)
- for (Pos1 = 1; Pos1 <= L1; Pos1++)
- # If any of the options in this set was given, check whether
- # any of the options in the other sets was given. Only check
- # later sets since earlier sets will have already been checked
- # against this set.
- if ((c1 = substr(s1,Pos1,1)) in Options)
- for (OSetNum = SetNum+1; OSetNum <= NumSets; OSetNum++) {
- s2 = Sets[OSetNum]
- L2 = length(s2)
- for (Pos2 = 1; Pos2 <= L2; Pos2++)
- if ((c2 = substr(s2,Pos2,1)) in Options)
- ErrStr = ErrStr "\n"\
- sprintf("Cannot give both %s and %s options.",
- c1,c2)
- }
- }
- }
- if (ErrStr != "")
- return substr(ErrStr,2)
- return ""
- }
-
- # The value of each instance of option Opt that occurs in Options[] is made an
- # index of Set[].
- # The return value is the number of instances of Opt in Options.
- function Opt2Set(Options,Opt,Set, count) {
- if (!(Opt in Options))
- return 0
- Set[Options[Opt]]
- count = Options[Opt,"count"]
- for (; count > 1; count--)
- Set[Options[Opt,count]]
- return count
- }
-
- # The value of each instance of option Opt that occurs in Options[] that
- # begins with "!" is made an index of nSet[] (with the ! stripped from it).
- # Other values are made indexes of Set[].
- # The return value is the number of instances of Opt in Options.
- function Opt2Sets(Options,Opt,Set,nSet, count,aSet,ret) {
- ret = Opt2Set(Options,Opt,aSet)
- for (value in aSet)
- if (substr(value,1,1) == "!")
- nSet[substr(value,2)]
- else
- Set[value]
- return ret
- }
-
- # Returns true if any option in the string Opts was given, as indicated by the
- # data in Options[]. If any of Arg, Env, or File are true, the given opts are
- # only considered to have been set if they were set in the command line
- # arguments, environment, or in a configuration file, respectively.
- function OptsGiven(Options,Opts,Arg,Env,File, l,i,Opt,j,c) {
- if (!Arg && !Env && !File)
- Arg = Env = File = 1
- l = length(Opts)
- for (i = 1; i <= l; i++) {
- Opt = substr(Opts,i,1)
- for (j = 1; (Opt,"num",j) in Options; j++) {
- c = Options[Opt,"num",j]
- if (Arg && c+0 > 0 || File && c == "f" || Env && c == "e")
- return 1
- }
- }
- return 0
- }
- ### End of ProcArgs library
-